import requests as req
print(req.get("https://api.openstreetmap.org/api/0.6/node/26162465").text)
<?xml version="1.0" encoding="UTF-8"?> <osm version="0.6" generator="CGImap 0.8.3 (906934 spike-07.openstreetmap.org)" copyright="OpenStreetMap and contributors" attribution="http://www.openstreetmap.org/copyright" license="http://opendatacommons.org/licenses/odbl/1-0/"> <node id="26162465" visible="true" version="156" changeset="90482230" timestamp="2020-09-06T14:22:58Z" user="Iváns" uid="7579660" lat="53.9023340" lon="27.5618791"> <tag k="addr:country" v="BY"/> <tag k="addr:postcode" v="220000"/> <tag k="admin_level" v="2"/> <tag k="alt_name:be" v="Менск"/> <tag k="alt_name:gl" v="Мінск"/> <tag k="alt_name:vi" v="Minsk;Minxcơva"/> <tag k="capital" v="yes"/> <tag k="capital_ISO3166-1" v="yes"/> <tag k="ele" v="280"/> <tag k="int_name" v="Minsk"/> <tag k="is_in:continent" v="Europe"/> <tag k="is_in:country" v="Belarus"/> <tag k="is_in:country_code" v="BY"/> <tag k="name" v="Минск"/> <tag k="name:ar" v="مينسك"/> <tag k="name:ast" v="Minsk"/> <tag k="name:bat-smg" v="Minsks"/> <tag k="name:be" v="Мінск"/> <tag k="name:be-tarask" v="Менск"/> <tag k="name:bg" v="Минск"/> <tag k="name:bo" v="མིན་སིཀ།"/> <tag k="name:ckb" v="مینسک"/> <tag k="name:cs" v="Minsk"/> <tag k="name:csb" v="Mińsk"/> <tag k="name:cu" v="Мѣньскъ"/> <tag k="name:cv" v="Минск"/> <tag k="name:de" v="Minsk"/> <tag k="name:el" v="Μινσκ"/> <tag k="name:en" v="Minsk"/> <tag k="name:eo" v="Minsko"/> <tag k="name:es" v="Minsk"/> <tag k="name:et" v="Minsk"/> <tag k="name:fa" v="مینسک"/> <tag k="name:fi" v="Minsk"/> <tag k="name:fr" v="Minsk"/> <tag k="name:ga" v="Minsc"/> <tag k="name:gl" v="Minsk"/> <tag k="name:he" v="מינסק"/> <tag k="name:hi" v="मिन्स्क"/> <tag k="name:hr" v="Minsk"/> <tag k="name:hu" v="Minszk"/> <tag k="name:hy" v="Մինսկ"/> <tag k="name:ia" v="Minsk"/> <tag k="name:io" v="Minsk"/> <tag k="name:is" v="Minsk"/> <tag k="name:it" v="Minsk"/> <tag k="name:ja" v="ミンスク"/> <tag k="name:jbo" v="misk"/> <tag k="name:ka" v="მინსკი"/> <tag k="name:kk" v="Минск"/> <tag k="name:kn" v="ಮಿನ್ಸ್ಕ್"/> <tag k="name:ko" v="민스크"/> <tag k="name:ku" v="Mînsk"/> <tag k="name:kv" v="Минск"/> <tag k="name:ky" v="Минск"/> <tag k="name:la" v="Minscum"/> <tag k="name:lt" v="Minskas"/> <tag k="name:lv" v="Minska"/> <tag k="name:mhr" v="Минск"/> <tag k="name:mk" v="Минск"/> <tag k="name:ml" v="മിൻസ്ക്"/> <tag k="name:mr" v="मिन्स्क"/> <tag k="name:myv" v="Минск ош"/> <tag k="name:nds" v="Minsk"/> <tag k="name:nl" v="Minsk"/> <tag k="name:no" v="Minsk"/> <tag k="name:oc" v="Minsk"/> <tag k="name:os" v="Минск"/> <tag k="name:pl" v="Mińsk"/> <tag k="name:pnb" v="منسک"/> <tag k="name:prefix" v="город"/> <tag k="name:pt" v="Minsk"/> <tag k="name:ru" v="Минск"/> <tag k="name:rue" v="Мінск"/> <tag k="name:sah" v="Минскай"/> <tag k="name:sk" v="Minsk"/> <tag k="name:sl" v="Minsk"/> <tag k="name:sr" v="Минск"/> <tag k="name:sr-Latn" v="Minsk"/> <tag k="name:sv" v="Minsk"/> <tag k="name:szl" v="Mińsk"/> <tag k="name:ta" v="மின்ஸ்க்"/> <tag k="name:tg" v="Минск"/> <tag k="name:th" v="มินสก์"/> <tag k="name:tt" v="Минск"/> <tag k="name:udm" v="Минск"/> <tag k="name:ug" v="مىنىسكى"/> <tag k="name:uk" v="Мінськ"/> <tag k="name:ur" v="منسک"/> <tag k="name:vi" v="Minxcơ"/> <tag k="name:vo" v="Minsk"/> <tag k="name:wuu" v="明斯克"/> <tag k="name:yi" v="מינסק"/> <tag k="name:zh" v="明斯克"/> <tag k="nat_name" v="Мінск"/> <tag k="old_name" v="Менск"/> <tag k="old_name:be" v="Менск"/> <tag k="place" v="city"/> <tag k="population" v="1982444"/> <tag k="population:date" v="2018-01-01"/> <tag k="source:name:oc" v="Lo Congrès"/> <tag k="source:population" v="Белстат"/> <tag k="website" v="https://minsk.gov.by/"/> <tag k="wikidata" v="Q2280"/> <tag k="wikipedia" v="ru:Минск"/> <tag k="wikipedia:be" v="Мінск"/> <tag k="wikipedia:en" v="Minsk"/> <tag k="wikipedia:pl" v="Mińsk"/> </node> </osm>
import pandas as pd
import numpy as np
import math
import re
import aiohttp
import asyncio
import matplotlib.pyplot as plt
from pandas import Series, DataFrame
from aiohttp import ClientSession
from collections import defaultdict
data = pd.DataFrame(pd.read_csv("punkty_belarusi_cleaned_1.csv", sep=";"))
data = data[data['osm:Node ID'].notna()]
data = data.astype({"osm:Node ID": "int64"})
data = data.rename(columns={"Вобласць": "region", "Раён": "district", "Назва без націскаў": "blr_name", "Назва па-расейску": "rus_name", "osm:Node ID": "id"})
data
| region | district | blr_name | rus_name | id | |
|---|---|---|---|---|---|
| 0 | Брэсцкая | <вобласць> | Брэст | Брест | 27171628 |
| 1 | Брэсцкая | <вобласць> | Баранавічы | Барановичи | 242978911 |
| 2 | Брэсцкая | <вобласць> | Пінск | Пинск | 242978912 |
| 3 | Брэсцкая | Баранавіцкі | Гарадзішча | Городище | 242979015 |
| 4 | Брэсцкая | Баранавіцкі | Амнявічы | Омневичи | 242992435 |
| ... | ... | ... | ... | ... | ... |
| 23966 | Мінская | Чэрвеньскі | Пятроўка | Петровка | 243022922 |
| 23967 | Мінская | Чэрвеньскі | Ратнае | Ратное | 243025750 |
| 23968 | Мінская | Чэрвеньскі | Стары Пруд | Старый Пруд | 243022754 |
| 23969 | Мінская | Чэрвеньскі | Хутар | Хутор | 243022771 |
| 23970 | Мінская | Чэрвеньскі | Юравічы | Юровичи | 243022914 |
23444 rows × 5 columns
data["latitude"] = np.nan
data["longtitude"] = np.nan
data
| region | district | blr_name | rus_name | id | latitude | longtitude | |
|---|---|---|---|---|---|---|---|
| 0 | Брэсцкая | <вобласць> | Брэст | Брест | 27171628 | NaN | NaN |
| 1 | Брэсцкая | <вобласць> | Баранавічы | Барановичи | 242978911 | NaN | NaN |
| 2 | Брэсцкая | <вобласць> | Пінск | Пинск | 242978912 | NaN | NaN |
| 3 | Брэсцкая | Баранавіцкі | Гарадзішча | Городище | 242979015 | NaN | NaN |
| 4 | Брэсцкая | Баранавіцкі | Амнявічы | Омневичи | 242992435 | NaN | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 23966 | Мінская | Чэрвеньскі | Пятроўка | Петровка | 243022922 | NaN | NaN |
| 23967 | Мінская | Чэрвеньскі | Ратнае | Ратное | 243025750 | NaN | NaN |
| 23968 | Мінская | Чэрвеньскі | Стары Пруд | Старый Пруд | 243022754 | NaN | NaN |
| 23969 | Мінская | Чэрвеньскі | Хутар | Хутор | 243022771 | NaN | NaN |
| 23970 | Мінская | Чэрвеньскі | Юравічы | Юровичи | 243022914 | NaN | NaN |
23444 rows × 7 columns
request_prefix_url = "https://api.openstreetmap.org/api/0.6/node/"
counter = 0
failed_counter = 0
def handle_coordinates(response, index):
global data
global failed_counter
longtitude = re.findall(r"lon=\"(\d+\.\d+)\"", response)
latitude = re.findall(r"lat=\"(\d+\.\d+)\"", response)
if len(latitude) == 0 or len(longtitude) == 0:
# print("Error on row " + str(index))
failed_counter += 1
return
data.at[index, "latitude"] = float(latitude[0])
data.at[index, "longtitude"] = float(longtitude[0])
async def get_node_details(id, session):
global counter
url = request_prefix_url + id
try:
response = await session.request(method='GET', url=url)
response.raise_for_status()
counter += 1
# print(counter)
except Exception as err:
# print(f"An error ocurred: {err}")
pass
return await response.text()
async def run_program(index, session):
try:
response = await get_node_details(str(data.at[index, "id"]), session)
handle_coordinates(response, index)
except Exception as err:
# print(f"Exception occured__: {err}")
pass
async with ClientSession() as session:
await asyncio.gather(*[run_program(index, session) for index in data.index])
print("From: {}\nFailed: {}".format(counter, failed_counter))
data = data[data['latitude'].notna()]
data = data[data['longtitude'].notna()]
data.to_csv("punkty_belarusi_with_cooridnates.csv", sep=";")
data
From: 23225 Failed: 214
| region | district | blr_name | rus_name | id | latitude | longtitude | |
|---|---|---|---|---|---|---|---|
| 0 | Брэсцкая | <вобласць> | Брэст | Брест | 27171628 | 52.093751 | 23.685185 |
| 1 | Брэсцкая | <вобласць> | Баранавічы | Барановичи | 242978911 | 53.132292 | 26.018416 |
| 2 | Брэсцкая | <вобласць> | Пінск | Пинск | 242978912 | 52.111361 | 26.102377 |
| 3 | Брэсцкая | Баранавіцкі | Гарадзішча | Городище | 242979015 | 53.326824 | 26.005677 |
| 4 | Брэсцкая | Баранавіцкі | Амнявічы | Омневичи | 242992435 | 53.355020 | 25.902420 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 23966 | Мінская | Чэрвеньскі | Пятроўка | Петровка | 243022922 | 53.694300 | 28.702980 |
| 23967 | Мінская | Чэрвеньскі | Ратнае | Ратное | 243025750 | 53.785280 | 28.748050 |
| 23968 | Мінская | Чэрвеньскі | Стары Пруд | Старый Пруд | 243022754 | 53.761984 | 28.620105 |
| 23969 | Мінская | Чэрвеньскі | Хутар | Хутор | 243022771 | 53.760812 | 28.684500 |
| 23970 | Мінская | Чэрвеньскі | Юравічы | Юровичи | 243022914 | 53.785000 | 28.691109 |
23225 rows × 7 columns
def get_ending(word):
return word[-2:]
data["blr_ending"] = ""
data["rus_ending"] = ""
for index in data.index:
data.loc[index, "blr_ending"] = get_ending(data.at[index, "blr_name"])
data.loc[index, "rus_ending"] = get_ending(data.at[index, "rus_name"])
data
| region | district | blr_name | rus_name | id | latitude | longtitude | blr_ending | rus_ending | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | Брэсцкая | <вобласць> | Брэст | Брест | 27171628 | 52.093751 | 23.685185 | ст | ст |
| 1 | Брэсцкая | <вобласць> | Баранавічы | Барановичи | 242978911 | 53.132292 | 26.018416 | чы | чи |
| 2 | Брэсцкая | <вобласць> | Пінск | Пинск | 242978912 | 52.111361 | 26.102377 | ск | ск |
| 3 | Брэсцкая | Баранавіцкі | Гарадзішча | Городище | 242979015 | 53.326824 | 26.005677 | ча | ще |
| 4 | Брэсцкая | Баранавіцкі | Амнявічы | Омневичи | 242992435 | 53.355020 | 25.902420 | чы | чи |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 23966 | Мінская | Чэрвеньскі | Пятроўка | Петровка | 243022922 | 53.694300 | 28.702980 | ка | ка |
| 23967 | Мінская | Чэрвеньскі | Ратнае | Ратное | 243025750 | 53.785280 | 28.748050 | ае | ое |
| 23968 | Мінская | Чэрвеньскі | Стары Пруд | Старый Пруд | 243022754 | 53.761984 | 28.620105 | уд | уд |
| 23969 | Мінская | Чэрвеньскі | Хутар | Хутор | 243022771 | 53.760812 | 28.684500 | ар | ор |
| 23970 | Мінская | Чэрвеньскі | Юравічы | Юровичи | 243022914 | 53.785000 | 28.691109 | чы | чи |
23225 rows × 9 columns
def handle_endings(names):
endings = defaultdict(lambda: 0)
for name in names:
ending = get_ending(name)
endings[ending] += 1
sorted_endings = sorted(endings.items(), key=lambda kv: kv[1], reverse=True)
endings = dict([(key, value) for key, value in sorted_endings])
return defaultdict(int, endings)
blr_endings = handle_endings(data["blr_name"])
print(blr_endings)
endings_data = pd.DataFrame(dict(blr_endings).items(), columns=["ending", "total"])
endings_data[:50]
defaultdict(<class 'int'>, {'кі': 3153, 'чы': 2261, 'ка': 2239, 'ва': 2074, 'на': 1930, 'цы': 986, 'ны': 825, 'ча': 619, 'ца': 401, 'ае': 369, 'ры': 322, 'ая': 319, 'да': 301, 'лі': 274, 'ец': 258, 'ты': 237, 'ле': 216, 'лы': 203, 'ня': 200, 'нь': 198, 'ок': 190, 'ды': 189, 'ні': 189, '’е': 184, 'ск': 179, 'шы': 173, 'аў': 168, ' 2': 162, ' 1': 158, 'ін': 154, 'ія': 137, 'ль': 136, 'це': 128, 'гі': 119, 'се': 118, 'не': 116, 'хі': 115, 'та': 111, 'ша': 105, 'ыя': 104, 'ын': 102, 'жа': 99, 'ра': 97, 'ор': 95, 'зе': 95, 'сы': 92, 'ое': 81, 'ці': 78, 'ло': 77, 'ік': 71, 'ўе': 67, 'вы': 66, 'но': 65, 'оў': 62, 'ля': 61, 'бы': 56, 'ак': 55, 'ар': 51, 'зы': 50, 'ць': 50, 'аі': 45, 'га': 40, 'мы': 38, 'эц': 37, 'зь': 37, 'ха': 37, 'ай': 37, 'зі': 36, 'цк': 36, 'жы': 36, 'ог': 35, 'ма': 33, 'пы': 33, 'еж': 31, 'ан': 30, 'ба': 29, 'ач': 29, 'ес': 29, 'од': 29, 'ла': 24, 'ст': 22, 'са': 21, 'ет': 20, 'сі': 20, 'па': 18, 'ал': 18, 'аг': 18, ' 3': 16, 'бр': 16, 'аж': 16, 'уд': 15, 'еі': 15, 'ея': 14, 'за': 14, 'ад': 14, 'ер': 13, 'уб': 12, 'сь': 12, 'ык': 12, 'уг': 10, 'уі': 10, 'он': 10, 'ёў': 10, 'ір': 9, 'ох': 9, 'як': 9, 'ўі': 9, 'цё': 9, 'іч': 9, 'яі': 8, 'оз': 8, 'ук': 8, 'еў': 8, 'ас': 8, 'аш': 7, 'еч': 7, 'цо': 7, '’і': 7, 'іж': 7, 'яя': 7, 'эс': 7, 'эя': 6, '’я': 6, 'ол': 6, 'ут': 6, 'ыр': 6, 'рг': 6, 'ый': 6, 'уп': 6, 'ел': 5, 'шч': 5, 'оп': 5, 'ім': 5, 'ой': 5, 'чо': 5, 'мя': 5, 'рд': 5, 'ёк': 5, 'ыш': 4, 'ыч': 4, 'яе': 4, 'уя': 4, 'ат': 4, 'ўё': 4, 'оі': 4, 'ёл': 4, 'рн': 4, 'ьч': 4, 'ях': 4, 'пі': 4, 'ёс': 3, 'яж': 3, 'уж': 3, 'уш': 3, 'ац': 3, 'уй': 3, 'ко': 3, 'ыж': 3, 'уч': 3, 'ун': 3, 'ом': 3, 'ам': 3, 'мо': 2, 'то': 2, 'ус': 2, 'ек': 2, 'яч': 2, 'шо': 2, 'юч': 2, 'нт': 2, 'ум': 2, 'лё': 2, '’ё': 2, 'яг': 2, 'ож': 2, 'эп': 2, 'рс': 2, 'рч': 2, 'іш': 2, 'ьс': 2, 'із': 2, 'эй': 2, 'зё': 2, 'ій': 1, 'ро': 1, 'ыі': 1, 'Ор': 1, 'ух': 1, 'рэ': 1, 'яз': 1, 'лм': 1, 'Яя': 1, ' 5': 1, ' 4': 1, ' 6': 1, 'кт': 1, 'Шо': 1, 'рф': 1, 'сё': 1, 'ех': 1, 'ТС': 1, 'от': 1, 'ей': 1, 'оя': 1, 'ёж': 1, 'ўж': 1, 'аз': 1, 'зд': 1, 'рп': 1, 'лк': 1, 'ві': 1, 'ўя': 1, 'юб': 1, 'д)': 1, 'му': 1, 'юн': 1, 'ур': 1, 'нг': 1, 'пр': 1, 'ош': 1, 'эў': 1, 'ёз': 1, 'эм': 1, 'юм': 1, 'ья': 1, 'ён': 1, 'юі': 1, 'Яр': 1, 'аб': 1})
| ending | total | |
|---|---|---|
| 0 | кі | 3153 |
| 1 | чы | 2261 |
| 2 | ка | 2239 |
| 3 | ва | 2074 |
| 4 | на | 1930 |
| 5 | цы | 986 |
| 6 | ны | 825 |
| 7 | ча | 619 |
| 8 | ца | 401 |
| 9 | ае | 369 |
| 10 | ры | 322 |
| 11 | ая | 319 |
| 12 | да | 301 |
| 13 | лі | 274 |
| 14 | ец | 258 |
| 15 | ты | 237 |
| 16 | ле | 216 |
| 17 | лы | 203 |
| 18 | ня | 200 |
| 19 | нь | 198 |
| 20 | ок | 190 |
| 21 | ды | 189 |
| 22 | ні | 189 |
| 23 | ’е | 184 |
| 24 | ск | 179 |
| 25 | шы | 173 |
| 26 | аў | 168 |
| 27 | 2 | 162 |
| 28 | 1 | 158 |
| 29 | ін | 154 |
| 30 | ія | 137 |
| 31 | ль | 136 |
| 32 | це | 128 |
| 33 | гі | 119 |
| 34 | се | 118 |
| 35 | не | 116 |
| 36 | хі | 115 |
| 37 | та | 111 |
| 38 | ша | 105 |
| 39 | ыя | 104 |
| 40 | ын | 102 |
| 41 | жа | 99 |
| 42 | ра | 97 |
| 43 | ор | 95 |
| 44 | зе | 95 |
| 45 | сы | 92 |
| 46 | ое | 81 |
| 47 | ці | 78 |
| 48 | ло | 77 |
| 49 | ік | 71 |
endings_data["average_latitude"] = 0.0
endings_data["average_longtitude"] = 0.0
endings_data
| ending | total | average_latitude | average_longtitude | |
|---|---|---|---|---|
| 0 | кі | 3153 | 0.0 | 0.0 |
| 1 | чы | 2261 | 0.0 | 0.0 |
| 2 | ка | 2239 | 0.0 | 0.0 |
| 3 | ва | 2074 | 0.0 | 0.0 |
| 4 | на | 1930 | 0.0 | 0.0 |
| ... | ... | ... | ... | ... |
| 224 | ья | 1 | 0.0 | 0.0 |
| 225 | ён | 1 | 0.0 | 0.0 |
| 226 | юі | 1 | 0.0 | 0.0 |
| 227 | Яр | 1 | 0.0 | 0.0 |
| 228 | аб | 1 | 0.0 | 0.0 |
229 rows × 4 columns
for index in data.index:
# print(index)
ending = get_ending(data.at[index, "blr_name"])
endings_data.loc[endings_data["ending"] == ending, "average_latitude"] += data.at[index, "latitude"]
endings_data.loc[endings_data["ending"] == ending, "average_longtitude"] += data.at[index, "longtitude"]
for index in endings_data.index:
endings_data.at[index, "average_latitude"] /= endings_data.at[index, "total"]
endings_data.at[index, "average_longtitude"] /= endings_data.at[index, "total"]
endings_data
| ending | total | average_latitude | average_longtitude | |
|---|---|---|---|---|
| 0 | кі | 3153 | 54.116258 | 27.397853 |
| 1 | чы | 2261 | 53.541833 | 27.176313 |
| 2 | ка | 2239 | 53.675438 | 28.807168 |
| 3 | ва | 2074 | 54.215526 | 28.233761 |
| 4 | на | 1930 | 54.270410 | 27.920103 |
| ... | ... | ... | ... | ... |
| 224 | ья | 1 | 54.416310 | 27.291410 |
| 225 | ён | 1 | 54.374682 | 28.874564 |
| 226 | юі | 1 | 53.854980 | 27.309780 |
| 227 | Яр | 1 | 53.602551 | 28.048780 |
| 228 | аб | 1 | 53.807170 | 28.596730 |
229 rows × 4 columns
data["scaled_latitude"] = np.nan
data["scaled_longtitude"] = np.nan
min_latitude = data["latitude"].min()
max_latitude = data["latitude"].max()
latitude_delta = max_latitude - min_latitude
min_longtitude = data["longtitude"].min()
max_longtitude = data["longtitude"].max()
longtitude_delta = max_longtitude - min_longtitude
for index in data.index:
# print(index)
data.at[index, "scaled_latitude"] = (data.at[index, "latitude"] - min_latitude) / latitude_delta
data.at[index, "scaled_longtitude"] = (data.at[index, "longtitude"] - min_longtitude) / longtitude_delta
data
| region | district | blr_name | rus_name | id | latitude | longtitude | blr_ending | rus_ending | scaled_latitude | scaled_longtitude | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Брэсцкая | <вобласць> | Брэст | Брест | 27171628 | 52.093751 | 23.685185 | ст | ст | 0.165041 | 0.049754 |
| 1 | Брэсцкая | <вобласць> | Баранавічы | Барановичи | 242978911 | 53.132292 | 26.018416 | чы | чи | 0.379489 | 0.294650 |
| 2 | Брэсцкая | <вобласць> | Пінск | Пинск | 242978912 | 52.111361 | 26.102377 | ск | ск | 0.168677 | 0.303463 |
| 3 | Брэсцкая | Баранавіцкі | Гарадзішча | Городище | 242979015 | 53.326824 | 26.005677 | ча | ще | 0.419658 | 0.293313 |
| 4 | Брэсцкая | Баранавіцкі | Амнявічы | Омневичи | 242992435 | 53.355020 | 25.902420 | чы | чи | 0.425480 | 0.282475 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 23966 | Мінская | Чэрвеньскі | Пятроўка | Петровка | 243022922 | 53.694300 | 28.702980 | ка | ка | 0.495538 | 0.576422 |
| 23967 | Мінская | Чэрвеньскі | Ратнае | Ратное | 243025750 | 53.785280 | 28.748050 | ае | ое | 0.514324 | 0.581153 |
| 23968 | Мінская | Чэрвеньскі | Стары Пруд | Старый Пруд | 243022754 | 53.761984 | 28.620105 | уд | уд | 0.509514 | 0.567723 |
| 23969 | Мінская | Чэрвеньскі | Хутар | Хутор | 243022771 | 53.760812 | 28.684500 | ар | ор | 0.509272 | 0.574482 |
| 23970 | Мінская | Чэрвеньскі | Юравічы | Юровичи | 243022914 | 53.785000 | 28.691109 | чы | чи | 0.514266 | 0.575176 |
23225 rows × 11 columns
plt.rcParams.update({'font.size': 22,
'text.color' : "black",
'axes.labelcolor' : "orange"})
POINTS_IN_CURVES = 315
STROKE_WIDTH = 2.5
MARKER_SIZE = 9.0
FIGURE_SIZE = 25
TICKS_FONT_SIZE = 20.0
AXES_TICK_COLOR = "orange"
def prepare_plot():
figure = plt.figure(figsize=(FIGURE_SIZE, FIGURE_SIZE))
ax = plt.gca()
ax.set_xlabel("Longtitude")
ax.set_ylabel("Latitude")
ax.tick_params(axis="both", width=5, length=10, direction="inout", color=AXES_TICK_COLOR)
plt.xticks(fontsize=TICKS_FONT_SIZE, color=AXES_TICK_COLOR)
plt.yticks(fontsize=TICKS_FONT_SIZE, color=AXES_TICK_COLOR)
plt.xlim(-0.1, 1.1)
plt.ylim(-0.1, 1.1)
def filtered_plot(drawing_data, function, classes, showlegend=True):
plt.close()
prepare_plot()
number_of_classes = len(classes)
number_of_points = len(drawing_data)
x=[[] for i in range(number_of_classes)]
y=[[] for i in range(number_of_classes)]
for index in drawing_data.index:
result = function(drawing_data.at[index, "blr_name"])
if result not in classes:
continue
result_index = classes.index(result)
x[result_index].append(drawing_data.at[index, "scaled_longtitude"])
y[result_index].append(drawing_data.at[index, "scaled_latitude"])
x_np = np.array([np.array(i) for i in x], dtype=object)
y_np = np.array([np.array(i) for i in y], dtype=object)
for i in range(number_of_classes):
if len(x_np[i] > 0):
plt.scatter(x_np[i], y_np[i], 30.0, alpha=0.8, label=classes[i])
if showlegend:
plt.legend()
plt.show()
drawing_data = data.sample(frac=1).reset_index(drop=True)
drawing_endings = list(blr_endings.keys())[:5]
filtered_plot(drawing_data, get_ending, drawing_endings)
filtered_plot(drawing_data, get_ending, ["цы"])
filtered_plot(drawing_data, get_ending, ["кі", "чы", "ка"])
# для любителей экстрима
filtered_plot(drawing_data, len, range(30))
def get_sh(word):
return word.count("ш")
filtered_plot(drawing_data, get_sh, range(1, 20))
filtered_plot(drawing_data, get_sh, [3])
filtered_plot(drawing_data, len, [5,6, 11, 12, 17, 18])
def count_special_letter_1(word):
return word.count("й")
def count_special_letter_2(word):
return word.count("ё")
filtered_plot(drawing_data, count_special_letter_1, range(1, 10))
filtered_plot(drawing_data, count_special_letter_2, range(1, 10))
number_of_regions = 29
number_of_top_endings = 10
region_markers = np.append(np.arange(0.0, 1.0, 1.0 / number_of_regions), 1.00000001)
regions = [(region_markers[i], region_markers[i + 1]) for i in range(len(region_markers) - 1)]
regions
[(0.0, 0.034482758620689655), (0.034482758620689655, 0.06896551724137931), (0.06896551724137931, 0.10344827586206896), (0.10344827586206896, 0.13793103448275862), (0.13793103448275862, 0.1724137931034483), (0.1724137931034483, 0.20689655172413793), (0.20689655172413793, 0.24137931034482757), (0.24137931034482757, 0.27586206896551724), (0.27586206896551724, 0.3103448275862069), (0.3103448275862069, 0.3448275862068966), (0.3448275862068966, 0.3793103448275862), (0.3793103448275862, 0.41379310344827586), (0.41379310344827586, 0.4482758620689655), (0.4482758620689655, 0.48275862068965514), (0.48275862068965514, 0.5172413793103449), (0.5172413793103449, 0.5517241379310345), (0.5517241379310345, 0.5862068965517241), (0.5862068965517241, 0.6206896551724138), (0.6206896551724138, 0.6551724137931034), (0.6551724137931034, 0.6896551724137931), (0.6896551724137931, 0.7241379310344828), (0.7241379310344828, 0.7586206896551724), (0.7586206896551724, 0.7931034482758621), (0.7931034482758621, 0.8275862068965517), (0.8275862068965517, 0.8620689655172413), (0.8620689655172413, 0.896551724137931), (0.896551724137931, 0.9310344827586207), (0.9310344827586207, 0.9655172413793103), (0.9655172413793103, 1.00000001)]
top_endings = list(blr_endings.keys())[:number_of_top_endings]
regions_data = pd.DataFrame(regions, columns=["left_border", "right_border"])
regions_data[top_endings] = 0.0
regions_data["total"] = 0
regions_data
| left_border | right_border | кі | чы | ка | ва | на | цы | ны | ча | ца | ае | total | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.000000 | 0.034483 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 |
| 1 | 0.034483 | 0.068966 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 |
| 2 | 0.068966 | 0.103448 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 |
| 3 | 0.103448 | 0.137931 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 |
| 4 | 0.137931 | 0.172414 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 |
| 5 | 0.172414 | 0.206897 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 |
| 6 | 0.206897 | 0.241379 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 |
| 7 | 0.241379 | 0.275862 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 |
| 8 | 0.275862 | 0.310345 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 |
| 9 | 0.310345 | 0.344828 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 |
| 10 | 0.344828 | 0.379310 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 |
| 11 | 0.379310 | 0.413793 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 |
| 12 | 0.413793 | 0.448276 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 |
| 13 | 0.448276 | 0.482759 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 |
| 14 | 0.482759 | 0.517241 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 |
| 15 | 0.517241 | 0.551724 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 |
| 16 | 0.551724 | 0.586207 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 |
| 17 | 0.586207 | 0.620690 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 |
| 18 | 0.620690 | 0.655172 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 |
| 19 | 0.655172 | 0.689655 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 |
| 20 | 0.689655 | 0.724138 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 |
| 21 | 0.724138 | 0.758621 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 |
| 22 | 0.758621 | 0.793103 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 |
| 23 | 0.793103 | 0.827586 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 |
| 24 | 0.827586 | 0.862069 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 |
| 25 | 0.862069 | 0.896552 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 |
| 26 | 0.896552 | 0.931034 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 |
| 27 | 0.931034 | 0.965517 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 |
| 28 | 0.965517 | 1.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 |
for index in data.index:
ending = get_ending(data.at[index, "blr_name"])
if ending not in top_endings:
continue
longtitude = data.at[index, "scaled_longtitude"]
regions_data.loc[(regions_data["left_border"] <= longtitude) & (regions_data["right_border"] > longtitude), "total"] += 1
regions_data.loc[(regions_data["left_border"] <= longtitude) & (regions_data["right_border"] > longtitude), ending] += 1.0
for index in regions_data.index:
total = regions_data.at[index, "total"]
for ending_ in top_endings:
regions_data.at[index, ending_] /= total
regions_data
| left_border | right_border | кі | чы | ка | ва | на | цы | ны | ча | ца | ае | total | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.000000 | 0.034483 | 0.258065 | 0.225806 | 0.112903 | 0.112903 | 0.096774 | 0.064516 | 0.000000 | 0.080645 | 0.032258 | 0.016129 | 62 |
| 1 | 0.034483 | 0.068966 | 0.223443 | 0.238095 | 0.131868 | 0.091575 | 0.058608 | 0.113553 | 0.084249 | 0.018315 | 0.032967 | 0.007326 | 273 |
| 2 | 0.068966 | 0.103448 | 0.280269 | 0.199552 | 0.076233 | 0.100897 | 0.076233 | 0.094170 | 0.100897 | 0.015695 | 0.035874 | 0.020179 | 446 |
| 3 | 0.103448 | 0.137931 | 0.254364 | 0.192020 | 0.067332 | 0.084788 | 0.089776 | 0.159601 | 0.087282 | 0.012469 | 0.037406 | 0.014963 | 401 |
| 4 | 0.137931 | 0.172414 | 0.273913 | 0.254348 | 0.082609 | 0.056522 | 0.076087 | 0.156522 | 0.060870 | 0.010870 | 0.015217 | 0.013043 | 460 |
| 5 | 0.172414 | 0.206897 | 0.244292 | 0.244292 | 0.089041 | 0.084475 | 0.075342 | 0.168950 | 0.052511 | 0.018265 | 0.018265 | 0.004566 | 438 |
| 6 | 0.206897 | 0.241379 | 0.274775 | 0.231982 | 0.040541 | 0.117117 | 0.078829 | 0.105856 | 0.103604 | 0.029279 | 0.015766 | 0.002252 | 444 |
| 7 | 0.241379 | 0.275862 | 0.273942 | 0.189310 | 0.102450 | 0.113586 | 0.115813 | 0.084633 | 0.082405 | 0.013363 | 0.017817 | 0.006682 | 449 |
| 8 | 0.275862 | 0.310345 | 0.240642 | 0.201872 | 0.073529 | 0.097594 | 0.110963 | 0.080214 | 0.131016 | 0.038770 | 0.024064 | 0.001337 | 748 |
| 9 | 0.310345 | 0.344828 | 0.281570 | 0.211604 | 0.064846 | 0.104096 | 0.122867 | 0.056314 | 0.092150 | 0.037543 | 0.015358 | 0.013652 | 586 |
| 10 | 0.344828 | 0.379310 | 0.249664 | 0.194631 | 0.063087 | 0.100671 | 0.173154 | 0.067114 | 0.075168 | 0.033557 | 0.022819 | 0.020134 | 745 |
| 11 | 0.379310 | 0.413793 | 0.245958 | 0.172055 | 0.081986 | 0.109700 | 0.193995 | 0.071594 | 0.062356 | 0.021940 | 0.017321 | 0.023095 | 866 |
| 12 | 0.413793 | 0.448276 | 0.222892 | 0.130522 | 0.123494 | 0.127510 | 0.202811 | 0.077309 | 0.046185 | 0.027108 | 0.019076 | 0.023092 | 996 |
| 13 | 0.448276 | 0.482759 | 0.212411 | 0.137232 | 0.120525 | 0.164678 | 0.130072 | 0.085919 | 0.051313 | 0.053699 | 0.022673 | 0.021480 | 838 |
| 14 | 0.482759 | 0.517241 | 0.191395 | 0.102374 | 0.143917 | 0.201780 | 0.140950 | 0.035608 | 0.028190 | 0.071217 | 0.034125 | 0.050445 | 674 |
| 15 | 0.517241 | 0.551724 | 0.188742 | 0.107616 | 0.142384 | 0.185430 | 0.150662 | 0.056291 | 0.034768 | 0.062914 | 0.031457 | 0.039735 | 604 |
| 16 | 0.551724 | 0.586207 | 0.158070 | 0.123128 | 0.183028 | 0.138103 | 0.134775 | 0.053245 | 0.031614 | 0.084859 | 0.046589 | 0.046589 | 601 |
| 17 | 0.586207 | 0.620690 | 0.141566 | 0.117470 | 0.212349 | 0.149096 | 0.155120 | 0.030120 | 0.037651 | 0.058735 | 0.052711 | 0.045181 | 664 |
| 18 | 0.620690 | 0.655172 | 0.181090 | 0.136218 | 0.213141 | 0.161859 | 0.113782 | 0.038462 | 0.040064 | 0.056090 | 0.035256 | 0.024038 | 624 |
| 19 | 0.655172 | 0.689655 | 0.188218 | 0.139368 | 0.191092 | 0.150862 | 0.120690 | 0.044540 | 0.027299 | 0.067529 | 0.040230 | 0.030172 | 696 |
| 20 | 0.689655 | 0.724138 | 0.192308 | 0.087533 | 0.184350 | 0.234748 | 0.131300 | 0.035809 | 0.034483 | 0.039788 | 0.022546 | 0.037135 | 754 |
| 21 | 0.724138 | 0.758621 | 0.170915 | 0.095952 | 0.211394 | 0.209895 | 0.124438 | 0.041979 | 0.032984 | 0.043478 | 0.028486 | 0.040480 | 667 |
| 22 | 0.758621 | 0.793103 | 0.156495 | 0.111111 | 0.272300 | 0.190923 | 0.111111 | 0.018779 | 0.026604 | 0.062598 | 0.031299 | 0.018779 | 639 |
| 23 | 0.793103 | 0.827586 | 0.176471 | 0.100840 | 0.283613 | 0.130252 | 0.142857 | 0.027311 | 0.054622 | 0.031513 | 0.023109 | 0.029412 | 476 |
| 24 | 0.827586 | 0.862069 | 0.173759 | 0.060284 | 0.368794 | 0.163121 | 0.088652 | 0.024823 | 0.039007 | 0.049645 | 0.010638 | 0.021277 | 282 |
| 25 | 0.862069 | 0.896552 | 0.184358 | 0.117318 | 0.273743 | 0.128492 | 0.162011 | 0.022346 | 0.022346 | 0.033520 | 0.022346 | 0.033520 | 179 |
| 26 | 0.896552 | 0.931034 | 0.077670 | 0.194175 | 0.339806 | 0.126214 | 0.106796 | 0.038835 | 0.009709 | 0.038835 | 0.029126 | 0.038835 | 103 |
| 27 | 0.931034 | 0.965517 | 0.149123 | 0.122807 | 0.535088 | 0.061404 | 0.061404 | 0.000000 | 0.008772 | 0.017544 | 0.000000 | 0.043860 | 114 |
| 28 | 0.965517 | 1.000000 | 0.035714 | 0.035714 | 0.750000 | 0.071429 | 0.071429 | 0.000000 | 0.035714 | 0.000000 | 0.000000 | 0.000000 | 28 |
regions_data["average_longtitude"] = (regions_data["left_border"] + regions_data["right_border"]) / 2
regions_data
| left_border | right_border | кі | чы | ка | ва | на | цы | ны | ча | ца | ае | total | average_longtitude | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.000000 | 0.034483 | 0.258065 | 0.225806 | 0.112903 | 0.112903 | 0.096774 | 0.064516 | 0.000000 | 0.080645 | 0.032258 | 0.016129 | 62 | 0.017241 |
| 1 | 0.034483 | 0.068966 | 0.223443 | 0.238095 | 0.131868 | 0.091575 | 0.058608 | 0.113553 | 0.084249 | 0.018315 | 0.032967 | 0.007326 | 273 | 0.051724 |
| 2 | 0.068966 | 0.103448 | 0.280269 | 0.199552 | 0.076233 | 0.100897 | 0.076233 | 0.094170 | 0.100897 | 0.015695 | 0.035874 | 0.020179 | 446 | 0.086207 |
| 3 | 0.103448 | 0.137931 | 0.254364 | 0.192020 | 0.067332 | 0.084788 | 0.089776 | 0.159601 | 0.087282 | 0.012469 | 0.037406 | 0.014963 | 401 | 0.120690 |
| 4 | 0.137931 | 0.172414 | 0.273913 | 0.254348 | 0.082609 | 0.056522 | 0.076087 | 0.156522 | 0.060870 | 0.010870 | 0.015217 | 0.013043 | 460 | 0.155172 |
| 5 | 0.172414 | 0.206897 | 0.244292 | 0.244292 | 0.089041 | 0.084475 | 0.075342 | 0.168950 | 0.052511 | 0.018265 | 0.018265 | 0.004566 | 438 | 0.189655 |
| 6 | 0.206897 | 0.241379 | 0.274775 | 0.231982 | 0.040541 | 0.117117 | 0.078829 | 0.105856 | 0.103604 | 0.029279 | 0.015766 | 0.002252 | 444 | 0.224138 |
| 7 | 0.241379 | 0.275862 | 0.273942 | 0.189310 | 0.102450 | 0.113586 | 0.115813 | 0.084633 | 0.082405 | 0.013363 | 0.017817 | 0.006682 | 449 | 0.258621 |
| 8 | 0.275862 | 0.310345 | 0.240642 | 0.201872 | 0.073529 | 0.097594 | 0.110963 | 0.080214 | 0.131016 | 0.038770 | 0.024064 | 0.001337 | 748 | 0.293103 |
| 9 | 0.310345 | 0.344828 | 0.281570 | 0.211604 | 0.064846 | 0.104096 | 0.122867 | 0.056314 | 0.092150 | 0.037543 | 0.015358 | 0.013652 | 586 | 0.327586 |
| 10 | 0.344828 | 0.379310 | 0.249664 | 0.194631 | 0.063087 | 0.100671 | 0.173154 | 0.067114 | 0.075168 | 0.033557 | 0.022819 | 0.020134 | 745 | 0.362069 |
| 11 | 0.379310 | 0.413793 | 0.245958 | 0.172055 | 0.081986 | 0.109700 | 0.193995 | 0.071594 | 0.062356 | 0.021940 | 0.017321 | 0.023095 | 866 | 0.396552 |
| 12 | 0.413793 | 0.448276 | 0.222892 | 0.130522 | 0.123494 | 0.127510 | 0.202811 | 0.077309 | 0.046185 | 0.027108 | 0.019076 | 0.023092 | 996 | 0.431034 |
| 13 | 0.448276 | 0.482759 | 0.212411 | 0.137232 | 0.120525 | 0.164678 | 0.130072 | 0.085919 | 0.051313 | 0.053699 | 0.022673 | 0.021480 | 838 | 0.465517 |
| 14 | 0.482759 | 0.517241 | 0.191395 | 0.102374 | 0.143917 | 0.201780 | 0.140950 | 0.035608 | 0.028190 | 0.071217 | 0.034125 | 0.050445 | 674 | 0.500000 |
| 15 | 0.517241 | 0.551724 | 0.188742 | 0.107616 | 0.142384 | 0.185430 | 0.150662 | 0.056291 | 0.034768 | 0.062914 | 0.031457 | 0.039735 | 604 | 0.534483 |
| 16 | 0.551724 | 0.586207 | 0.158070 | 0.123128 | 0.183028 | 0.138103 | 0.134775 | 0.053245 | 0.031614 | 0.084859 | 0.046589 | 0.046589 | 601 | 0.568966 |
| 17 | 0.586207 | 0.620690 | 0.141566 | 0.117470 | 0.212349 | 0.149096 | 0.155120 | 0.030120 | 0.037651 | 0.058735 | 0.052711 | 0.045181 | 664 | 0.603448 |
| 18 | 0.620690 | 0.655172 | 0.181090 | 0.136218 | 0.213141 | 0.161859 | 0.113782 | 0.038462 | 0.040064 | 0.056090 | 0.035256 | 0.024038 | 624 | 0.637931 |
| 19 | 0.655172 | 0.689655 | 0.188218 | 0.139368 | 0.191092 | 0.150862 | 0.120690 | 0.044540 | 0.027299 | 0.067529 | 0.040230 | 0.030172 | 696 | 0.672414 |
| 20 | 0.689655 | 0.724138 | 0.192308 | 0.087533 | 0.184350 | 0.234748 | 0.131300 | 0.035809 | 0.034483 | 0.039788 | 0.022546 | 0.037135 | 754 | 0.706897 |
| 21 | 0.724138 | 0.758621 | 0.170915 | 0.095952 | 0.211394 | 0.209895 | 0.124438 | 0.041979 | 0.032984 | 0.043478 | 0.028486 | 0.040480 | 667 | 0.741379 |
| 22 | 0.758621 | 0.793103 | 0.156495 | 0.111111 | 0.272300 | 0.190923 | 0.111111 | 0.018779 | 0.026604 | 0.062598 | 0.031299 | 0.018779 | 639 | 0.775862 |
| 23 | 0.793103 | 0.827586 | 0.176471 | 0.100840 | 0.283613 | 0.130252 | 0.142857 | 0.027311 | 0.054622 | 0.031513 | 0.023109 | 0.029412 | 476 | 0.810345 |
| 24 | 0.827586 | 0.862069 | 0.173759 | 0.060284 | 0.368794 | 0.163121 | 0.088652 | 0.024823 | 0.039007 | 0.049645 | 0.010638 | 0.021277 | 282 | 0.844828 |
| 25 | 0.862069 | 0.896552 | 0.184358 | 0.117318 | 0.273743 | 0.128492 | 0.162011 | 0.022346 | 0.022346 | 0.033520 | 0.022346 | 0.033520 | 179 | 0.879310 |
| 26 | 0.896552 | 0.931034 | 0.077670 | 0.194175 | 0.339806 | 0.126214 | 0.106796 | 0.038835 | 0.009709 | 0.038835 | 0.029126 | 0.038835 | 103 | 0.913793 |
| 27 | 0.931034 | 0.965517 | 0.149123 | 0.122807 | 0.535088 | 0.061404 | 0.061404 | 0.000000 | 0.008772 | 0.017544 | 0.000000 | 0.043860 | 114 | 0.948276 |
| 28 | 0.965517 | 1.000000 | 0.035714 | 0.035714 | 0.750000 | 0.071429 | 0.071429 | 0.000000 | 0.035714 | 0.000000 | 0.000000 | 0.000000 | 28 | 0.982759 |
beautiful_pirson_table = pd.DataFrame(index=["Коэф. Пирсона"], columns=top_endings)
mean_longtitude = regions_data["average_longtitude"].mean()
for ending_ in top_endings:
mean_ending = regions_data[ending_].mean()
sum_nominator = 0.0
sum_denominator_ending = 0.0
sum_denominator_longtitude = 0.0
for index in regions_data.index:
ending_delta = (regions_data.at[index, ending_] - mean_ending)
longtitude_delta = (regions_data.at[index, "average_longtitude"] - mean_longtitude)
sum_nominator += ending_delta * longtitude_delta
sum_denominator_ending += ending_delta * ending_delta
sum_denominator_longtitude += longtitude_delta * longtitude_delta
beautiful_pirson_table.at["Коэф. Пирсона", ending_] = sum_nominator / np.sqrt(sum_denominator_ending * sum_denominator_longtitude)
beautiful_pirson_table
| кі | чы | ка | ва | на | цы | ны | ча | ца | ае | |
|---|---|---|---|---|---|---|---|---|---|---|
| Коэф. Пирсона | -0.84751 | -0.81357 | 0.792674 | 0.372709 | 0.169812 | -0.829409 | -0.611021 | 0.156616 | -0.206303 | 0.511285 |
filtered_plot(drawing_data, get_ending, ["кі"])
filtered_plot(drawing_data, get_ending, ["чы"])
filtered_plot(drawing_data, get_ending, ["ка"])
filtered_plot(drawing_data, get_ending, ["ча"])